import pandas as pd


data = pd.read_excel('data/国民经济行业分类.xlsx')

li = []
tree = {}
for r, rse in data.iterrows():
    a_z = str(rse['门类'])
    code = str(rse['1396小类'])
    if a_z.isalpha() and a_z != 'nan':
        a_name = f"{a_z}>{rse['20000类']}"
        a_dic = tree.setdefault(a_name, {})
    elif len(a_z) == 2:
        b_name = f"{a_z}>{rse['20000类']}"
        b_dic = a_dic.setdefault(b_name, {})
    elif len(a_z) == 3 and a_z != 'nan' and len(code) == 4:
        c_name = f"{a_z}>{rse['20000类']}"
        c_dic = b_dic.setdefault(c_name, {})
        d_name = f"{code}>{rse['20000类']}"
        d_dic = c_dic.setdefault(d_name, {})
        se = {'门类20': a_name, '大类97': b_name, '中类473': c_name, '小类1382': d_name}
        li.append(se)
    elif len(a_z) == 3 and a_z != 'nan':
        c_name = f"{a_z}>{rse['20000类']}"
        c_dic = b_dic.setdefault(c_name, {})
    elif len(code) == 4:
        d_name = f"{code}>{rse['20000类']}"
        d_dic = c_dic.setdefault(d_name, {})
        se = {'门类20': a_name, '大类97': b_name, '中类473': c_name, '小类1382': d_name}
        li.append(se)
    
df = pd.DataFrame(li)
df2 = df.iloc[:,:2].drop_duplicates()
df3 = df.iloc[:,:3].drop_duplicates()

with pd.ExcelWriter('./output/分类表.xlsx') as w:
    df2.to_excel(w, sheet_name='二级到97大类', index=True)
    df3.to_excel(w, sheet_name='三级到473类', index=True)
    df.to_excel(w, sheet_name='四级到1382小类.xlsx')
print(666)